### TEST: glimmav1 dataset (MArrayLM) ###

library(Glimma)
library(limma)
library(GlimmaV2)

data(lymphomaRNAseq)
rnaseq <- lymphomaRNAseq

# add lane
groups <- data.frame(genotype=rnaseq$samples$group,
                     lane= as.character(c(rep(4,5),3,3)),
                     miscCont=c(rep(4000,5),300,250),
                     miscDisc=c("blue","red",rep("green",5)))

# add libsize
groups <- rnaseq$samples$group

# fit
design <- model.matrix(~0+groups)
contrasts <- cbind(Smchd1null.vs.WT=c(-1,1))

# convert raw counts to logCPM values by automatically extracting libsizes and normalisation factors from x
vm <- voomWithQualityWeights(rnaseq, design=design)
fit <- lmFit(vm, design=design)
fit <- contrasts.fit(fit, contrasts)
fit <- eBayes(fit)
dtFit <- decideTests(fit)
# MArrayLM plot
counts <- rnaseq$counts
groups <- rnaseq$samples$group
glimmaMA(fit, counts=counts, groups=groups, width=900, height=900)
### RNASeq 123 dataset ###

library(edgeR, quietly=TRUE)
library(Mus.musculus, quietly=TRUE)
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:limma':
## 
##     plotMA
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which, which.max, which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## 
## 
library(GlimmaV2, quietly=TRUE)

# load data
files <- c("GSM1545535_10_6_5_11.txt", "GSM1545536_9_6_5_11.txt", 
   "GSM1545538_purep53.txt", "GSM1545539_JMS8-2.txt", 
   "GSM1545540_JMS8-3.txt", "GSM1545541_JMS8-4.txt", 
   "GSM1545542_JMS8-5.txt", "GSM1545544_JMS9-P7c.txt", 
   "GSM1545545_JMS9-P8c.txt")


x <- readDGE(files, columns=c(1,3))

# add sample info
samples <- substring(colnames(x), 12, nchar(colnames(x)))
colnames(x) <- samples
group <- as.factor(c("LP", "ML", "Basal", "Basal", "ML", "LP", 
                     "Basal", "ML", "LP"))
x$samples$group <- group
lane <- as.factor(rep(c("L004","L006","L008"), c(3,4,2)))
x$samples$lane <- lane

# add gene info
geneid <- rownames(x)
genes <- select(Mus.musculus, keys=geneid, columns=c("SYMBOL", "TXCHROM"), 
                keytype="ENTREZID")
## 'select()' returned 1:many mapping between keys and columns
genes <- genes[!duplicated(genes$ENTREZID),]
x$genes <- genes

# transformations from raw scale
cpm <- cpm(x)
lcpm <- cpm(x, log=TRUE)

# remove lowly expressed genes
keep.exprs <- filterByExpr(x, group=group)
x <- x[keep.exprs,, keep.lib.sizes=FALSE]


# normalising gene expression distributions
x <- calcNormFactors(x, method = "TMM")

# generate design
design <- model.matrix(~0+group+lane)
colnames(design) <- gsub("group", "", colnames(design))
## DGELRT ##

# estimate dispersion
x <- estimateDisp(x, design)
glmfit <- glmFit(x, design)
lrt <- glmLRT(glmfit, coef=5)
glimmaMA(lrt, counts=x$counts, groups=group)